--[[

Copyright 2019 NetherEran.
This program is licensed under the MIT license

This provides a markov chain object. This can be used to create
themed nonsense based on input strings.
How it works:
	-cut input strings into word lists by splitting at the space characters
	--put technical START and END words at the start and the end
	-store the propabilities for each word to follow after another word
	
	-to create own sentences, start at the START word and randomly
		walk according to the stored propabilities until the END word
		or a maximum sentence length is reached	
]]



local MAX_SENTENCE_LENGTH = 30 --in words

--we use numeric indices for start and end nodes so input (which is
--strings) can't mess with it
local START = 1
local END = 2

--node related stuff
local node_prototype = {}


local function link(node, to_link)
	node.link_count = node.link_count + 1
	node.links[to_link] = (node.links[to_link] or 0) + 1
end

--returns a node object that stores what it links, where it's linked
--from and the weight of its links
local function MarkovNode()
	local prototype = {}
	prototype.links = {} --word -> weight
	prototype.linked_by = {} --word array
	prototype.link_count = 0
	
	for k, v in pairs(node_prototype)
	do
		prototype[k] = v
	end
	return prototype
end



--graph related stuff
local graph_prototype = {}

--links two words or increases the weight if they're already linked
function graph_prototype:link(word1, word2)
	link(self.nodes[word1], word2)
	local reverse = self.nodes[word2].linked_by
	for _, name in ipairs(reverse)
	do
		if name == word1
		then
			return
		end
	end
	table.insert(reverse, word1)
end

--splits a string at its space characters
local function cutup(str)
	local pieces = {}
	pieces[0] = START
	local i = 1
	for k in string.gmatch(str, "([^%s]+)")
	do
		pieces[i] = k
		i = i + 1
	end
	pieces[i] = END
	
	
	if pieces[1] == END
	then
		return
	else
		return pieces
	end
end

--cuts a line into words and puts them into the graph as nodes and does weight stuff
function graph_prototype:learn_line(line)
	local input = cutup(line)
	if not input
	then
		return
	end
	for i, v in ipairs(input)
	do
		self:link(input[i - 1], v)
	end
end

--read a file and treat each line as a line to learn from
function graph_prototype:learn_from_file(filename)
	for line in io.lines(filename)
	do
		self:learn_line(line)
	end
end

--randomly pick an order of words based on the links and weights
function graph_prototype:randomwalk()
	local path = {}
	local current = START
	local sentence_length = 0
	while current ~= END and sentence_length < MAX_SENTENCE_LENGTH
	do
		sentence_length = sentence_length + 1
		local node = self.nodes[current]
		local rand = math.random(node.link_count + 1)
		
		for k, v in pairs(node.links)
		do
			rand = rand - v
			if rand <= 0
			then
				current = k
				table.insert(path, k)
				break
			end
		end
	end
	path[#path] = nil
	return path
end

--returns a sentense based on a randomwalk
function graph_prototype:get_sentence()
	local path = self:randomwalk()
	return table.concat(path, " ")
end

--counts the nodes of a graph without START and END
function graph_prototype:count_known_words()
	local count = 0
	for name, _ in pairs(self.nodes)
	do
		if not(name == START or name == END)
		then
			count = count + 1
		end
	end
	return count
end



--checks if a table contains a value
local function contains(table, value)
	for k, v in pairs(table)
	do
		if v == value
		then
			return true
		end
	end
	return false
end

--returns dead nodes. A node is dead when it can not be reached from
--START or END can't be reached from it
local function get_dead_nodes(graph)
	--calculate the distance to each node from START
	local sdistances = {}
	sdistances[START] = 0
	local to_visit = {START}
	while #to_visit > 0
	do
		local current = table.remove(to_visit)
		local current_distance = sdistances[current] + 1
		for node, _ in pairs(graph.nodes[current].links)
		do
			if current_distance < (sdistances[node] or math.huge)
			then
				sdistances[node] = current_distance
				if not contains(to_visit, node)
				then
					table.insert(to_visit, node)
				end
			end
		end
	end
	--calculate the distance to each node from END (same as above but backwards)
	local edistances = {}
	edistances[END] = 0
	table.insert(to_visit, END)
	while #to_visit > 0
	do
		local current = table.remove(to_visit)
		local current_distance = edistances[current] + 1
		for _, node in pairs(graph.nodes[current].linked_by)
		do
			if current_distance < (edistances[node] or math.huge)
			then
				edistances[node] = current_distance
				if not contains(to_visit, node)
				then
					table.insert(to_visit, node)
				end
			end
		end
	end
	
	--a node is dead when if and only if it has no distance to START or END
	local dead = {}
	for name, node in pairs(graph.nodes)
	do
		if not (edistances[name] and sdistances[name])
		then
			
			table.insert(dead, name)
		end
	end
	return dead
end

--removes a node and the links to it
local function remove(graph, to_remove)
	--don't remove start or end
	if to_remove == START or to_remove == END
	then
		return
	end
	--remove links to to_remove
	for i, name in ipairs(graph.nodes[to_remove].linked_by)
	do
		local node = graph.nodes[name]
		node.link_count = node.link_count - node.links[to_remove]
		node.links[to_remove] = nil
	end
	--remove notes that to_remove links nodes
	for name, _ in pairs(graph.nodes[to_remove].links)
	do
		local node = graph.nodes[name]
		for i, v in ipairs(node.linked_by)
		do
			if v == to_remove
			then
				table.remove(node.linked_by, i)
				break
			end
		end
	end
	--actually remove node
	graph.nodes[to_remove] = nil
end

--removes a node, and the nodes that become dead by removing it
function graph_prototype:remove_node(to_remove)
	remove(self, to_remove)
	local dead = get_dead_nodes(self)
	for i, name in ipairs(dead)
	do
		remove(self, name)
	end
end

--returns the node that is has appeared the least often
local function get_least_used(nodes)
	local least_uses = math.huge
	local least_used = nil
	
	for name, node in pairs(nodes)
	do
		if node.link_count < least_uses and
			not(name == END or name == START)
		then
			least_uses = node.link_count
			least_used = name
		end
	end
	return least_used
end

--cuts the amount of known words to 'to' words maximum.
function graph_prototype:cull(to)
	local count = self:count_known_words()
	while count > to
	do
		local least_used = get_least_used(self.nodes)
		if least_used
		then
			self:remove_node(least_used)
		end
		count = self:count_known_words()
	end
end

local nodes_meta = {}
--create new MarkovNode when indexed
function nodes_meta.__index(self, key)
	self[key] = MarkovNode()
	return self[key]
end



--returns a MarkovGraph object
local function MarkovGraph()
	local prototype = {}
	prototype.nodes = {} --word -> MarkovNode
	setmetatable(prototype.nodes, nodes_meta)
	
	for k, v in pairs(graph_prototype)
	do
		prototype[k] = v
	end
	
	return prototype
end
return MarkovGraph


